Part I. Healthcare_Db_Mgmt Visualize Results from Exported SQL Tables.

Goal: answer some basic questions regarding the datasets that we generated in SQL. This code requires that the tables “act10_data or act10_food are exported from SQL

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(lubridate)
library(tidyr)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(forcats)
# assumes you have the two tables in current directory

setwd("~/Documents/DB_MGMG_1012024/final_db_sql_r_csv_exports/act_data")

act10_data <- read_csv("act10_data.csv") 
## Rows: 2148 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): dexcom_id, event_type, source_device_id
## dbl  (5): transmitter_time, glucose_value_mgdl, avg_magnitude, average_hr, a...
## dttm (2): dexcom_datetime, activity_datetime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
prepare_data <- function(df) {
  df %>%
    mutate(
      dexcom_datetime = ymd_hms(dexcom_datetime),
      activity_datetime = ymd_hms(activity_datetime),
      hour = hour(dexcom_datetime),
      date = date(dexcom_datetime)
    )
}

# example for individual #10

prepare_data(act10_data)
## # A tibble: 2,148 × 12
##    dexcom_id  dexcom_datetime     transmitter_time glucose_value_mgdl event_type
##    <chr>      <dttm>                         <dbl>              <dbl> <chr>     
##  1 2020-03-2… 2020-03-22 11:24:17             8100                103 EGV       
##  2 2020-03-2… 2020-03-22 11:29:17             8400                102 EGV       
##  3 2020-03-2… 2020-03-22 11:34:17             8700                100 EGV       
##  4 2020-03-2… 2020-03-22 11:39:17             9000                 98 EGV       
##  5 2020-03-2… 2020-03-22 11:44:17             9300                 94 EGV       
##  6 2020-03-2… 2020-03-22 11:49:18             9600                 95 EGV       
##  7 2020-03-2… 2020-03-22 11:54:17             9900                 97 EGV       
##  8 2020-03-2… 2020-03-22 11:59:18            10200                 99 EGV       
##  9 2020-03-2… 2020-03-22 12:04:18            10500                 97 EGV       
## 10 2020-03-2… 2020-03-22 12:09:18            10800                 97 EGV       
## # ℹ 2,138 more rows
## # ℹ 7 more variables: source_device_id <chr>, activity_datetime <dttm>,
## #   avg_magnitude <dbl>, average_hr <dbl>, activity_intensity_score <dbl>,
## #   hour <int>, date <date>

Glucose vs Heart Rate for Participant # 10 with exported .csv “act10_data.csv” as the Activity_Glucose Dataset

plot_glucose_hr <- function(df) {
  ggplot(df, aes(x = average_hr, y = glucose_value_mgdl, 
                 color = activity_intensity_score)) +
    geom_point(alpha = 0.6) +
    scale_color_viridis_c() +
    theme_minimal() +
    labs(
      title = "Glucose vs Heart Rate",
      x = "Average Heart Rate",
      y = "Glucose (mg/dL)",
      color = "Activity\nIntensity"
    )
}

plot_glucose_hr(act10_data)
## Warning: Removed 125 rows containing missing values or values outside the scale range
## (`geom_point()`).

Compare Glucose Levels vs Carbohydrate Content for Particpant 10 “act10_food.csv” exported datset.

## Rows: 2148 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (7): dexcom_id, event_type, source_device_id, food_log_id, logged_food...
## dbl  (10): transmitter_time, glucose_value_mgdl, food_log_participant_id, am...
## lgl   (1): food_log_time_end
## dttm  (1): dexcom_datetime
## date  (1): food_log_date
## time  (1): food_log_time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `food_log_datetime = ymd_hms(paste(food_log_date,
##   food_log_time))`.
## Caused by warning:
## !  2073 failed to parse.
plot_nutrient_impact <- function(df) {
  df %>%
    ggplot(aes(x = total_carb, y = glucose_value_mgdl)) +
    geom_point(aes(size = calorie, color = sugar)) +
    geom_smooth(method = "lm", se = TRUE) +
    scale_color_viridis_c("Sugar (g)") +
    facet_wrap(~meal_category) +
    theme_minimal() +
    labs(
      title = "Glucose Levels vs Carbohydrate Content",
      x = "Total Carbohydrates (g)",
      y = "Glucose (mg/dL)",
      size = "Calories"
    )
}

plot_nutrient_impact(prepped_food_10)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2073 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2073 rows containing missing values or values outside the scale range
## (`geom_point()`).

Plot Daily Patterns of Glucose Levels for Participant #10

# 5. Time-of-Day Food and Glucose Patterns
# need to have cleaned object ready for this function to work
# this graph is for participant 10 and not as striking as participant 2

plot_daily_patterns <- function(df) {
  df %>%
    ggplot(aes(x = hour(food_log_datetime), y = glucose_value_mgdl)) +
    geom_point(aes(color = total_carb, size = calorie), alpha = 0.6) +
    geom_smooth(method = "loess", color = "blue") +
    scale_color_viridis_c("Total Carbs (g)") +
    theme_minimal() +
    labs(
      title = "Daily Glucose Patterns with Meal Information",
      x = "Hour of Day",
      y = "Glucose (mg/dL)",
      size = "Calories"
    ) +
    scale_x_continuous(breaks = 0:23)
}

plot_daily_patterns(prepped_food_10)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2073 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2073 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Create an interactive plot using Plotly, which is perhaps giving you more flexibilty to review the nutritational data for a participant of interest. This requires creating summary statistics. Use Exported .csv for Participant 10 as an example.

# this function could also benefit from a SQL based query rather than individuall taking one participant at a time.

# requires the prepped_food_10 dataframe

# install.packages("plotly")
# library(plotly)

calculate_meal_stats <- function(df) {
  df %>%
    group_by(logged_food) %>%
    summarize(
      avg_glucose = mean(glucose_value_mgdl),
      max_glucose = max(glucose_value_mgdl),
      avg_carbs = mean(total_carb),
      avg_protein = mean(protein),
      avg_fat = mean(total_fat),
      n_meals = n_distinct(food_log_id)
    )
}

interactive_viz_10 <- calculate_meal_stats(prepped_food_10)

create_food_analysis <- function(data) {
  # Ensure plotly is loaded
  require(plotly)
  
  # Create a scatter plot
  plot_ly(data) %>%
    add_trace(
      x = ~avg_carbs, 
      y = ~avg_glucose,
      text = ~logged_food,
      size = ~avg_protein,
      type = 'scatter',
      mode = 'markers',
      marker = list(
        colorscale = 'Viridis',
        color = ~avg_fat,
        showscale = TRUE,
        sizeref = 0.1
      ),
      hovertemplate = paste(
        "<b>%{text}</b><br>",
        "Carbs: %{x}g<br>",
        "Glucose: %{y}mg/dL<br>",
        "Protein (size): %{marker.size}g<br>",
        "Fat (color): %{marker.color}g"
      )
    ) %>%
    layout(
      title = list(text = "Food Nutrient Analysis"),  # Updated title syntax
      xaxis = list(title = "Carbohydrates (g)"),
      yaxis = list(title = "Glucose Response (mg/dL)")
    )
}

create_food_analysis(interactive_viz_10)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Warning: Ignoring 1 observations
## Warning: `line.width` does not currently support multiple values.

PART 2. Now instead of visualizing data from exported tables. Query the SQL database using dbConnect() and getQuery() and Justin’s combined function that was integrated into pgAdmin4/Unix. This approach will allow for more flexibility to generate results more flexibly and reproducibly.

library(DBI)
library(RPostgres)
library(dplyr)
library(ggplot2)
library(patchwork)
library(tidyr)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(cluster)
library(plotly)

#more functions Neal

library(tidyverse)
library(ggplot2)
library(dplyr)
library(lubridate)
library(tidyr)
library(scales)
library(forcats)

This step will connect to the database: :5432

# Replace with your actual database details
# this will return: <PqConnection> final_justin_db_11424@localhost:5432
con <- dbConnect(
  RPostgres::Postgres(),
  dbname = "final_justin_db_11424",   # Name of the database
  host = "localhost",              # For local database
  port = 5432,                     # Default PostgreSQL port
  user = ,          # Your PostgreSQL username
  password =       # Your PostgreSQL password
)

con
## <PqConnection> final_justin_db_11424@localhost:5432

We can query the database object “con” for individual participants, e.g. participant id # 2 and get the activity log and food tables for this participant directly with one line of code. We can confirm there are the total number of participants in the SQLdb

two_activity <- dbGetQuery(con, "SELECT * FROM get_participant_activity_data(2);")
two_food <- dbGetQuery(con, "SELECT * FROM get_participant_food_data(2);")
participant <- dbGetQuery(con, "SELECT * FROM participant")

Here are all the tables in the SQLdb

# this worked 12-1-2024 (I had no user or password names in the db set up)

dbListTables(con)
##  [1] "activity"    "bvp"         "dexcom"      "eda"         "food_log"   
##  [6] "hr"          "ibi"         "nutrition"   "participant" "temp"

Replicate the visualization for one participant (No. 6)

# # Code generated with GenAI (Claude). Just visualize one participant.

six_food <- dbGetQuery(con, "SELECT * FROM get_participant_food_data(6);")

# Data preparation function
prepare_nutrition_data <- function(df) {
  df %>%
    mutate(
      dexcom_datetime = ymd_hms(dexcom_datetime),
      food_log_datetime = ymd_hms(paste(food_log_date, food_log_time)),
      # Calculate time since meal for each glucose reading
      time_since_meal = as.numeric(difftime(dexcom_datetime, food_log_datetime, units = "mins")),
      # Create meal categories
      meal_category = case_when(
        hour(food_log_datetime) < 11 ~ "Breakfast",
        hour(food_log_datetime) < 16 ~ "Lunch",
        TRUE ~ "Dinner"
      ),
      # Calculate macro ratios
      carb_ratio = total_carb / (total_carb + protein + total_fat),
      protein_ratio = protein / (total_carb + protein + total_fat),
      fat_ratio = total_fat / (total_carb + protein + total_fat)
    )
}

prep_6 <- prepare_nutrition_data(six_food)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `food_log_datetime = ymd_hms(paste(food_log_date,
##   food_log_time))`.
## Caused by warning:
## !  2801 failed to parse.
plot_daily_patterns <- function(df) {
  df %>%
    ggplot(aes(x = hour(food_log_datetime), y = glucose_value_mgdl)) +
    geom_point(aes(color = total_carb, size = calorie), alpha = 0.6) +
    geom_smooth(method = "loess", color = "blue") +
    scale_color_viridis_c("Total Carbs (g)") +
    theme_minimal() +
    labs(
      title = "Daily Glucose Patterns with Meal Information",
      x = "Hour of Day",
      y = "Glucose (mg/dL)",
      size = "Calories"
    ) +
    scale_x_continuous(breaks = 0:23)
}

plot_daily_patterns(prep_6)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2801 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2801 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Used GenAI (Claude) to create new functions that queries the database.

# ClaudeAI generated code block.

library(dplyr)
library(ggplot2)
library(lubridate)
library(DBI)
library(viridis)
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
# Function to fetch data for multiple participants
get_participant_data_claudeai <- function(con, participant_ids) {
  # Initialize empty list to store data frames
  participant_data <- list()
  
  # Fetch data for each participant
  for (id in participant_ids) {
    query <- sprintf("SELECT * FROM get_participant_food_data(%d);", id)
    participant_data[[as.character(id)]] <- dbGetQuery(con, query) %>%
      mutate(participant_id = id)  # Add participant ID column
  }
  
  # Combine all data frames
  do.call(rbind, participant_data)
}

# Data preparation function (enhanced version)
prepare_nutrition_data_enhanced <- function(df) {
  df %>%
    mutate(
      dexcom_datetime = ymd_hms(dexcom_datetime),
      food_log_datetime = ymd_hms(paste(food_log_date, food_log_time)),
      # Calculate time since meal for each glucose reading
      time_since_meal = as.numeric(difftime(dexcom_datetime, food_log_datetime, units = "mins")),
      # Create meal categories
      meal_category = case_when(
        hour(food_log_datetime) < 11 ~ "Breakfast",
        hour(food_log_datetime) < 16 ~ "Lunch",
        TRUE ~ "Dinner"
      ),
      # Calculate macro ratios
      carb_ratio = total_carb / (total_carb + protein + total_fat),
      protein_ratio = protein / (total_carb + protein + total_fat),
      fat_ratio = total_fat / (total_carb + protein + total_fat)
    )
}

# Enhanced plotting function for multiple participants
plot_daily_patterns_enhanced <- function(df, facet = TRUE) {
  base_plot <- df %>%
    ggplot(aes(x = hour(food_log_datetime), y = glucose_value_mgdl)) +
    geom_point(aes(color = total_carb, size = calorie), alpha = 0.6) +
    geom_smooth(method = "loess", color = "blue") +
    scale_color_viridis_c("Total Carbs (g)") +
    theme_minimal() +
    labs(
      title = "Daily Glucose Patterns with Meal Information",
      x = "Hour of Day",
      y = "Glucose (mg/dL)",
      size = "Calories"
    ) +
    scale_x_continuous(breaks = 0:23)
    
  if (facet) {
    base_plot + facet_wrap(~participant_id, scales = "free_y")
  } else {
    base_plot
  }
}

# Function for combined view with participant differentiation
plot_daily_patterns_combined_2 <- function(df) {
  df %>%
    filter(!is.na(glucose_value_mgdl)) %>%
    ggplot(aes(x = hour(food_log_datetime), y = glucose_value_mgdl, 
               color = factor(participant_id))) +
    geom_point(aes(size = total_carb), alpha = 0.6) +
    geom_smooth(method = "loess", se = TRUE) +
    scale_color_viridis_d("Participant ID") +
    scale_size_continuous("Total Carbs (g)") +
    theme_minimal() +
    theme(
      legend.position = "right",
      plot.title = element_text(size = 14, face = "bold")
    ) +
    labs(
      title = "Combined Daily Glucose Patterns",
      x = "Hour of Day",
      y = "Glucose (mg/dL)"
    ) +
    scale_x_continuous(breaks = seq(0, 24, by = 4))
}


# Function to analyze glucose response patterns
analyze_glucose_response_claudeai <- function(df) {
  df %>%
    group_by(participant_id, meal_category) %>%
    summarise(
      avg_glucose = mean(glucose_value_mgdl, na.rm = TRUE),
      max_glucose = max(glucose_value_mgdl, na.rm = TRUE),
      avg_carbs = mean(total_carb, na.rm = TRUE),
      n_meals = n_distinct(food_log_datetime),
      .groups = "drop"
    )
}

Now try the functions out to query the databased for more than one patient!

# GenAI generated code block

participant_ids_claudeAI <- c(6,10)

six_10_get <- get_participant_data_claudeai(con, participant_ids_claudeAI)

try_6_10 <- prepare_nutrition_data_enhanced(six_10_get)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `food_log_datetime = ymd_hms(paste(food_log_date,
##   food_log_time))`.
## Caused by warning:
## !  4874 failed to parse.
plot_daily_patterns_enhanced(try_6_10)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4874 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4874 rows containing missing values or values outside the scale range
## (`geom_point()`).

Obtain all individuals and visualize activity and glucose levels as a scatter plot (not visually stunning)

# Code generated with GenAI (Claude)

participant_ids_14_individuals <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14)

# Function to fetch data for multiple participants ACTIVITY data

get_participant_data_claudeai <- function(con, participant_ids) {
  # Initialize empty list to store data frames
  participant_data <- list()
  
  # Fetch data for each participant
  for (id in participant_ids) {
    query <- sprintf("SELECT * FROM get_participant_activity_data(%d);", id)
    participant_data[[as.character(id)]] <- dbGetQuery(con, query) %>%
      mutate(participant_id = id)  # Add participant ID column
  }
  
  # Combine all data frames
  do.call(rbind, participant_data)
}

fourteen_participant_examples <- get_participant_data_claudeai(con, participant_ids_14_individuals)

glimpse(fourteen_participant_examples)
## Rows: 28,085
## Columns: 11
## $ dexcom_id                <chr> "2020-02-21 11:08:36_2", "2020-02-21 11:13:36…
## $ dexcom_datetime          <dttm> 2020-02-21 11:08:36, 2020-02-21 11:13:36, 20…
## $ transmitter_time         <int64> 9901, 10201, 10501, 10801, 11101, 11401, 11…
## $ glucose_value_mgdl       <dbl> 186, 181, 175, 168, 163, 160, 157, 155, 151, …
## $ event_type               <chr> "EGV", "EGV", "EGV", "EGV", "EGV", "EGV", "EG…
## $ source_device_id         <chr> "Android G6", "Android G6", "Android G6", "An…
## $ activity_datetime        <dttm> 2020-02-21 11:08:36, 2020-02-21 11:13:36, 20…
## $ avg_magnitude            <dbl> 63.19019, 63.70243, 64.31174, 64.03124, 63.37…
## $ average_hr               <dbl> 89.98, 87.20, 88.60, 89.52, 85.95, 85.45, 81.…
## $ activity_intensity_score <dbl> 79.26408, 77.80097, 78.88470, 79.32450, 76.92…
## $ participant_id           <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …

Create a dendogram to cluster particpants

# Code generated with GenAI (Claude)

# Load required libraries
library(tidyverse)
library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Function to create feature matrix for clustering
create_feature_matrix <- function(data) {
  features <- data %>%
    group_by(participant_id) %>%
    summarise(
      # Glucose metrics
      mean_glucose = mean(glucose_value_mgdl, na.rm = TRUE),
      sd_glucose = sd(glucose_value_mgdl, na.rm = TRUE),
      cv_glucose = sd_glucose / mean_glucose * 100,
      
      # Heart rate metrics
      mean_hr = mean(average_hr, na.rm = TRUE),
      sd_hr = sd(average_hr, na.rm = TRUE),
      
      # Activity metrics
      mean_activity = mean(activity_intensity_score, na.rm = TRUE),
      sd_activity = sd(activity_intensity_score, na.rm = TRUE),
      
      # Correlations
      glucose_hr_corr = cor(glucose_value_mgdl, average_hr, use = "complete.obs"),
      glucose_activity_corr = cor(glucose_value_mgdl, activity_intensity_score, use = "complete.obs"),
      hr_activity_corr = cor(average_hr, activity_intensity_score, use = "complete.obs")
    )
  
  # Scale the features
  features_scaled <- features %>%
    column_to_rownames("participant_id") %>%
    scale()
  
  return(list(
    raw = features,
    scaled = features_scaled
  ))
}

# Function to perform hierarchical clustering
perform_clustering <- function(features_scaled) {
  # Calculate distance matrix
  dist_matrix <- dist(features_scaled, method = "euclidean")
  
  # Perform hierarchical clustering
  hc <- hclust(dist_matrix, method = "ward.D2")
  
  return(hc)
}

# Function to visualize clusters in different dimensions
plot_cluster_analysis <- function(features_raw, hc, k = 2) {
  # Get cluster assignments
  clusters <- cutree(hc, k = k)
  
  # Add cluster assignments to raw features
  features_with_clusters <- features_raw %>%
    mutate(cluster = as.factor(clusters))
  
  # Create plots
  p1 <- ggplot(features_with_clusters, 
               aes(x = mean_glucose, y = mean_hr, color = cluster)) +
    geom_point(size = 3) +
    labs(title = "Clusters by Mean Glucose and Heart Rate",
         x = "Mean Glucose (mg/dL)",
         y = "Mean Heart Rate (bpm)") +
    theme_minimal()
  
  p2 <- ggplot(features_with_clusters, 
               aes(x = mean_activity, y = cv_glucose, color = cluster)) +
    geom_point(size = 3) +
    labs(title = "Clusters by Activity and Glucose Variability",
         x = "Mean Activity Intensity",
         y = "Glucose Coefficient of Variation (%)") +
    theme_minimal()
  
  # Return both plots in a list
  return(list(glucose_hr = p1, activity_var = p2))
}

# Main analysis pipeline
analyze_participants <- function(data) {
  # Create feature matrix
  features <- create_feature_matrix(data)
  
  # Perform clustering
  hc <- perform_clustering(features$scaled)
  
  # Create visualizations
  plots <- plot_cluster_analysis(features$raw, hc)
  
  # Return results
  return(list(
    features = features,
    clustering = hc,
    plots = plots
  ))
}

# Run the analysis on your data
results <- analyze_participants(fourteen_participant_examples)

# Print feature summary
print(results$features$raw)
## # A tibble: 12 × 11
##    participant_id mean_glucose sd_glucose cv_glucose mean_hr sd_hr mean_activity
##             <dbl>        <dbl>      <dbl>      <dbl>   <dbl> <dbl>         <dbl>
##  1              2        130.        20.6       15.8    82.1  15.1          75.2
##  2              4        113.        18.0       16.0    79.8  15.2          74.0
##  3              5        105.        15.3       14.6    77.7  14.9          72.1
##  4              6        125.        28.9       23.2    80.7  17.9          73.9
##  5              7         93.1       18.2       19.6    74.2  15.9          70.3
##  6              8        111.        17.6       15.8    77.0  15.0          71.8
##  7              9        128.        23.8       18.6    77.1  16.0          72.6
##  8             10        112.        28.8       25.7    74.8  20.6          70.5
##  9             11        119.        24.6       20.6    74.5  17.3          70.2
## 10             12        122.        19.5       16.0    73.9  20.4          70.4
## 11             13        127.        22.6       17.7    69.8  17.4          67.4
## 12             14        116.        20.8       17.9    77.3  12.9          72.2
## # ℹ 4 more variables: sd_activity <dbl>, glucose_hr_corr <dbl>,
## #   glucose_activity_corr <dbl>, hr_activity_corr <dbl>
# Plot dendrogram
plot(results$clustering, main = "Participant Clustering Dendrogram")

# Display cluster plots
# print(results$plots$glucose_hr)
# print(results$plots$activity_var)

Create dendogram but as a heatmap to show correlation coefficients that are are used to cluster particpants.

# Code generated with GenAI (Claude)

library(tidyverse)
library(pheatmap)
library(viridis)

create_correlation_heatmap_2_new_features <- function(data) {
  features <- data %>%
    group_by(participant_id) %>%
    summarise(
      `Glucose-HR Corr` = cor(glucose_value_mgdl, average_hr, use = "complete.obs"),
      `Glucose-Activity Corr` = cor(glucose_value_mgdl, activity_intensity_score, use = "complete.obs"),
      `HR-Activity Corr` = cor(average_hr, activity_intensity_score, use = "complete.obs"),
      # New correlations
      `Activity-Glucose Change` = cor(diff(activity_intensity_score), diff(glucose_value_mgdl), use = "complete.obs"),
      `Activity Duration-Glucose` = cor(lag(activity_intensity_score), glucose_value_mgdl, use = "complete.obs")
    )
  
  feature_matrix <- features %>%
    column_to_rownames("participant_id") %>%
    as.matrix()
  
  feature_matrix_scaled <- scale(feature_matrix)
  
  png("participant_correlation_heatmap.png", width = 100, height = 300, res = 300)
  
  heatmap_obj <- pheatmap(feature_matrix_scaled,
           clustering_method = "ward.D2",
           clustering_distance_rows = "euclidean",
           clustering_distance_cols = "euclidean",
           show_rownames = TRUE,
           show_colnames = TRUE,
           main = "Participant Physiological Response Patterns",
           angle_col = 45,
           fontsize = 9,
           fontsize_row = 9,
           cellwidth = 15,
           cellheight = 15,
           color = viridis(100),
           display_numbers = FALSE,
           number_format = "%.2f",
           number_color = "black",
           border_color = "white",
           treeheight_row = 35,
           treeheight_col = 3,
           margins = c(200, 200))
  
  dev.off()
  
  return(heatmap_obj)
}

create_correlation_heatmap_2_new_features(fourteen_participant_examples)

Acknowledgements and Resources

Code available on GitHub https://github.com/NCodyBmi2024/Healthcare-db-mgmt-fall-2024

Justin Veerasami curated the database content from the original datasource, provided detailed steps to connect to the SQLdb and organized the code on GitHub.

Isaac Oyediran provided goals and objectives, reproduced analysis for selected particpants and presented key takeaways from each of the visualizations

Feedback and support from TAs and Mark Grivanis during the fall semester.